Site summary data
Biweekly summary of variables
Case Date Imputation - rule-based guesses
+3d from symptoms
+2d from test
-7d from end of isolation
Model for cases ~ .
Test high vs low traffic as FE in RE model
Finish written methods and results
Garbage data in, garbage inferences out… recurring problem for every project
Multiple imputation (mice) has a key assumption that data is MAR. But data is not MAR (arguably), it’s missing because of some dynamics not present in the data, like how much an admin cared to record cases when there were many of them. Using mice means making the analysis much more complicated (painfully so)…
Instead I used a simpler ad-hoc method: - if positive-test-result date is not available, then - use -5 days from end-of-isolation date, or if not available, - use the test date as is, or if not available, - use +3 days from symptoms-began date, or if not available,
It’s not really the most sound approach, but the data is questionable to begin with. It’s clear that the university gave up on collecting this data.
# Load ------------------------------------------------------
library(tidyverse)
library(here)
library(ggiraph)
library(patchwork)
ggplot2::theme_set(theme_bw())
source(here('R/report_plots.R'))
# Functions --------------------------------------------------
# creates yyyy-ww label for grouping data
get_date_week <- function(x){
y <- lubridate::year(x)
w <- lubridate::week(x) |> str_pad(2, 'left', 0)
str_glue("{y}-{w}")
}
# get biweekly bin date for vector of dates
get_date_biweekly <- function(x){
day_of_week <- lubridate::wday(x)
case_when(
day_of_week %in% 1:3 ~ x + 3 - day_of_week,
TRUE ~ x + 5 - day_of_week,
)
}
# dates 2021-2022 with biweekly groups
biweekly_date_sequence <- function(){
tibble(
date = seq(
as_date('2021-01-01'),
as_date('2022-12-31'),
by = 1)
) |>
mutate(biweek = get_date_biweekly(date))
}
# lookup table for date
week_midpoint_date_lookup <- function(start = '2021-01-01', end = '2023-01-01'){
ends = lubridate::as_date(c(start, end))
tibble(
date = seq(ends[1], ends[2], by = 1),
date_week = lubridate::week(date) |> str_pad(2, 'left', '0'),
date_year = lubridate::year(date),
week = str_glue("{date_year}-{date_week}"),
) |>
group_by(week) |>
summarise(date = mean(date))
}
binom_ci <- function(x, n){
Hmisc::binconf(x, n, method = 'wilson', alpha = 0.05) |>
as_tibble() |>
janitor::clean_names()
}
get_binom_ci <- function(data){
data |>
summarise(
x = sum(pcr_positive == 'Positive'),
n = n(),
binconf = map2(x, n, ~binom_ci(.x, .y))
) |>
unnest(binconf) |>
mutate(across(c(point_est, lower, upper), ~(.x*100) |> round(1)))
}
# Plot Elements ----------------------------------------------
theme_color <- 'cornflowerblue'
# layout x-axis
scale_x_study_dates <- function(){
scale_x_date(
date_breaks = 'month',
date_labels = month.name[c(9:12, 1:5)] |> strtrim(3),
limits = c(
min(swabs_tidy$date_swab),
max(swabs_tidy$date_swab)
))
}
# get rid of x-axis for vertically stacked plots
remove_x_labels <- function(){
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank())
}
# no minor grid
remove_grid <- function(){
theme(panel.grid.minor = element_blank(),
plot.subtitle = element_markdown(size = 8, hjust = 0),
plot.margin = unit(c(0.1, 1, 2, 0), unit = 'mm'),
plot.title.position = 'panel')
}
# Data -------------------------------------------------------
# swab data
swabs <-
read_rds(here('data/cube.rds')) |>
filter(str_detect(site, '^UO_'))
# lookup table for waste water site names
lookup_ww <- tribble(
~site_abbr, ~site,
'UO_AA', 'Annex Residence',
'UO_FA', 'Faculty of Social Sciences',
'UO_FT', 'Friel Residence',
'UO_NA', 'Northern sampling site',
'UO_RGN', 'Roger Guindon Hall',
'UO_ST', 'Southern sampling site',
'UO_TBT', 'Tabaret Hall'
)
# UOttawa waste water data from R. Delatolla
uottawa_ww <-
readxl::read_excel(here::here('data/ww_university.xlsx')) |>
janitor::clean_names() |>
mutate(sample_date = as_date(sample_date)) |>
filter(
sample_date > '2021-09-15',
sample_date < '2022-05-05',
!is.na(viral_copies_per_copies_pep_avg),
site != 'UO_RGN'
) |>
left_join(lookup_ww, by = c('site' = 'site_abbr')) |>
mutate(
source =
source_name |>
str_remove('^Data - uOttawa - ') |>
str_remove('.xlsx$')
) |>
transmute(site,
sample_date,
signal = viral_copies_per_copies_pep_avg)
# ottawa wastewater data: daily means
regional_ww <-
read_rds(here('data/ww_ottawa.rds')) |>
select(sample_date, starts_with('cov_')) |>
pivot_longer(contains('cov_')) |>
mutate(target = str_extract(name, 'cov_n.'),
stat = str_extract(name, 'mean|sd'),
) |>
select(-name) |>
pivot_wider(names_from = stat, values_from = value) |>
mutate(.lower = mean - sd, .upper = mean + sd)
# data from university of ottawa
wifi <-
read_rds(here('data/uo_wifi.rds')) |>
filter(date >= min(swabs$date_swab),
date <= max(swabs$date_swab))
# important dates for context
event_dates <-
tribble(
~event, ~start, ~end,
'Reading Week', '2021-10-24', '2021-10-30',
'Holiday Break', '2021-12-22', '2022-01-04',
'Closure', '2022-01-04', '2022-01-31',
'Closure', '2022-02-16', '2022-02-21',
'Reading Week', '2022-02-20', '2022-02-26',
) |>
mutate(across(start:end, as_date))
# UOttawa cases data
cases <-
read_rds(here('data/cases_rule_imputed.rds')) |>
select(case, role, case_date, UC:TBT) |>
mutate(biweek = get_date_biweekly(case_date)) |>
nest(associated_sites = UC:TBT) |>
print()
## # A tibble: 216 × 5
## case role case_date biweek associated_sites
## <dbl> <fct> <date> <date> <list>
## 1 1 Employee 2020-07-20 2020-07-21 <tibble [1 × 6]>
## 2 2 Employee 2020-08-04 2020-08-04 <tibble [1 × 6]>
## 3 3 Employee 2020-09-04 2020-09-03 <tibble [1 × 6]>
## 4 4 Student 2020-09-06 2020-09-08 <tibble [1 × 6]>
## 5 5 Student 2020-09-08 2020-09-08 <tibble [1 × 6]>
## 6 7 Employee 2020-09-10 2020-09-10 <tibble [1 × 6]>
## 7 8 Employee 2020-09-06 2020-09-08 <tibble [1 × 6]>
## 8 9 Student 2020-09-11 2020-09-10 <tibble [1 × 6]>
## 9 10 Student 2020-09-11 2020-09-10 <tibble [1 × 6]>
## 10 11 Employee 2020-09-14 2020-09-15 <tibble [1 × 6]>
## # … with 206 more rows
# Summaries --------------------------------------------------
swabs_tidy <- swabs |>
filter(!negative_control, swab_type != 'sponge') |>
select(date_swab, site, floor, location, pcr_positive, pcr_ct,
co2) |>
mutate(biweek = get_date_biweekly(date_swab))
positivity_summary <- function(swabs){
swabs |>
summarise(
n = n(),
x = sum(pcr_positive == 'Positive', na.rm = F),
positivity = round(100 * x / n, digits = 1),
.groups = 'drop')
}
co2_summary <- function(swabs){
swabs |>
summarise(
n = n(),
co2_vals = list(co2),
co2_mean = mean(co2, na.rm = T),
.groups = 'drop')
}
# overall summary
swab_summary <-
swabs_tidy |> positivity_summary()
# site summary
swab_summary_sites <- swabs_tidy |>
group_by(site) |>
positivity_summary()
# high-low traffic summary
swab_summary_location_traffic <- swabs_tidy |>
mutate(traffic = if_else(
str_detect(location, 'Site 1'), 'high traffic', 'low traffic'
)) |>
group_by(traffic) |>
positivity_summary()
## Figure 1: data aggregation ----
# uottawa cases - biweekly counts
cases_biweekly <-
cases |>
select(case, biweek, role) |>
group_by(biweek) |>
summarise(cases = n(),
cases_student = sum(role == 'Student'),
cases_staff = sum(role == 'Employee'),
) |>
right_join(
biweekly_date_sequence() |>
filter(biweek < '2022-02-03') |>
distinct(biweek),
by = 'biweek'
) |>
mutate(across(where(is.integer), ~if_else(is.na(.), 0L, .)))
# swabs biweekly summary
swabs_biweekly <-
swabs_tidy |>
group_by(biweek) |>
positivity_summary() |>
left_join(swabs_tidy |> group_by(biweek) |> co2_summary())
# swabs biweekly summary by site
swabs_biweekly_by_site <-
swabs_tidy |>
group_by(site, biweek) |>
positivity_summary() |>
left_join(swabs_tidy |> group_by(site, biweek) |> co2_summary())
# UOttawa ww biweekly means
uottawa_ww_biweekly <-
uottawa_ww |>
mutate(biweek = get_date_biweekly(sample_date)) |>
group_by(biweek) |>
summarise(signal = mean(signal),
n = n())
# daily ww data for study period
regional_ww_daily <-
regional_ww |>
filter(
sample_date >= min(swabs_tidy$date_swab) - 4,
sample_date <= max(swabs_tidy$date_swab) + 4,
) |>
group_by(sample_date) |>
summarise(mean = mean(mean))
# regional ww biweekly means
regional_ww_biweekly <-
regional_ww_daily |>
mutate(biweek = get_date_biweekly(sample_date)) |>
group_by(biweek) |>
summarise(mean = mean(mean))
# wifi site agg
wifi_extended <- read_rds(here('data/uo_wifi.rds')) |>
filter(date >= min(swabs$date_swab) - 2,
date <= max(swabs$date_swab) + 2) |>
mutate(biweek = get_date_biweekly(date))
# overall wifi biweekly timeseries
wifi_biweekly_sites <-
wifi_extended |>
group_by(biweek) |>
summarise(
n = n(),
min = min(clients),
mean = mean(clients),
max = max(clients)
)
# per building wifi biweekly timeseries
wifi_biweekly <-
wifi_biweek_sites <-
wifi_extended |>
group_by(biweek) |>
summarise(
n = n(),
min = min(clients),
mean = mean(clients),
max = max(clients)
)
# Map --------------------------------------------------------
swab_coords <- tribble(
~site, ~name, ~lat, ~lng,
'MRT', 'Morrisette Hall (MRT)', 45.423239101483546, -75.68428970961207,
'MNT', 'Montpetit', 45.42254171025894, -75.68265871464943,
'90U', '90 University', 45.42242555707402, -75.6850144991752,
'DMS', 'Desmarais Bldg.', 45.42387679340988, -75.68727075448753,
'TBT', 'Tabaret Hall', 45.42450940162542, -75.68631900184072,
'LPR', 'Louis Pasteur Bldg.', 45.421375668614466, -75.68026389993058,
)
ww_coords <- tribble(
~site, ~name, ~lat, ~lng,
'aa', 'Annex Residence', 45.42678129026445, -75.68034405960745,
'fa', 'Faculty of Soc. Sci.', 45.421624722628515, -75.68390386012699,
'ft', 'Friel Residence', 45.43043440080566, -75.68290766533741,
'tbt', 'Tabaret Hall', 45.42450940162542, -75.68631900184072
# 'na', 'North sampling Site', NA, NA,
# 'st', 'South Sampling Site', NA, NA, # Nobody knows where this is
)
figure_sites_map <-
leaflet::leaflet(swab_coords, width = 600, height = 330) |>
leaflet::addProviderTiles(leaflet::providers$CartoDB.Positron) |>
leaflet::setView(lng = -75.6843, lat = 45.4235, zoom = 14) |>
leaflet::addCircleMarkers(~lng, ~lat, label = ~name, popup = ~name,
radius = 2, color = '#440099') |>
leaflet::addLabelOnlyMarkers(
~lng, ~lat, label = ~site,
labelOptions = leaflet::labelOptions(
noHide = T, direction = 'top', textOnly = T,
)) |>
leaflet::addCircleMarkers(~lng, ~lat, label = ~site,
radius = 2, color = '#440099') |>
leaflet::addCircleMarkers(
data = ww_coords, ~lng, ~lat, label = ~name, popup = ~name,
radius = 4, color = '#f96999')
# Results ----------------------------------------------------
rs_data <- lst(
swabs_collected = nrow(swabs_tidy),
positivity_ovr = swab_summary$positivity,
positivity_site_min = min(swab_summary_sites$positivity),
positivity_site_max = max(swab_summary_sites$positivity),
)
rs_abstract <- rs_data |>
glue::glue_data(
"Over the 32-week study period, we collected {swabs_collected} swabs at six university buildings, including two buildings where wastewater surveillance was currently ongoing.
Overall, {positivity_ovr}% of swabs were PCR-positive for SARS-CoV-2, with individual buildings ranging between {positivity_site_min}% and {positivity_site_max}%.
*Comment on when spikes in cases, swabs, and waste-water occurred….* *Comment on prediction of cases using swabs, ww, or combined approach…*
"
)
rs_results <- rs_data |>
glue::glue_data()
# fig1 -----
fig1_timeline <-
event_dates |>
ggplot(aes(x = start, xend = end, y = event, yend = event)) +
geom_segment(linewidth = 5, lineend = 'round',
color = theme_color,
alpha = 0.35) +
geom_segment(linewidth = 1, lineend = 'butt',
color = theme_color,
alpha = 0.9) +
geom_point(aes(x = end), color = theme_color, size = 2) +
geom_point(color = theme_color, size = 2) +
scale_x_study_dates() +
labs(y = '', x = NULL, subtitle = 'Timeline') +
remove_x_labels() +
remove_grid()
fig1_cases <-
cases_biweekly |>
mutate(biweek = if_else(wday(biweek) == 5, biweek + 0.75, biweek - 0.75)) |>
select(biweek, cases_student, cases_staff) |>
pivot_longer(-biweek) |>
mutate(name = str_remove(name, 'cases_') |> str_to_title()) |>
ggplot() +
geom_vline(
data = tibble(x = as_date('2022-02-03')),
aes(xintercept = x),
lty = 2,
color = 'gray'
) +
geom_text(
data = tibble(
x = as_date('2022-02-05'),
y = 18,
label = 'End of data'
),
aes(x, y, label = label),
size = 2,
hjust = 0
) +
geom_col(
aes(biweek, value, fill = name),
size = 0.15,
width = 3.5,
color = 'gray80',
position = position_stack()
) +
labs(
x = NULL,
y = NULL,
fill = NULL,
subtitle = 'UOttawa COVID-19 Cases',
) +
scale_fill_carto_d() +
scale_x_study_dates() +
remove_grid() +
remove_x_labels() +
theme(
axis.title.y = ggtext::element_markdown(lineheight = 1),
legend.position = c(0.9, 0.4),
legend.key.size = unit(2, 'mm'),
legend.background = element_rect(color = 'grey80')
)
fig1_swabs <-
swabs_biweekly |>
ggplot(aes(biweek, positivity)) +
geom_smooth(
size = 0.5,
alpha = 0.2,
fill = theme_color,
span = 0.4,
method = 'loess',
formula = 'y ~ x',
na.rm = T) +
geom_point(color = theme_color, alpha = 0.85, shape = 1, size = 1) +
scale_x_study_dates() +
labs(x = NULL, y = NULL,
subtitle = 'Swab Positivity (%)') +
remove_x_labels() +
remove_grid()
fig1_co2 <-
ggplot(swabs_biweekly, aes(biweek, co2_mean)) +
geom_point(color = theme_color, shape = 1, alpha = 0.85, size = 1) +
geom_smooth(fill = theme_color, alpha = 0.2, size = 0.5) +
labs(x = NULL, y = NULL,
subtitle = 'CO~2~ (ppm)') +
scale_x_study_dates() +
remove_x_labels() +
remove_grid()
fig1_wifi <-
wifi_biweekly |>
ggplot(aes(biweek, mean)) +
geom_smooth(span = 0.5,
size = 0.5,
alpha = 0.2,
fill = theme_color) +
geom_point(color = theme_color, alpha = 0.85, shape = 1, size = 1) +
labs(x = NULL, y = NULL,
subtitle = "Wi-Fi Connections") +
scale_x_study_dates() +
remove_x_labels() +
remove_grid()
fig1_uo_ww <-
ggplot(uottawa_ww_biweekly,
aes(biweek, signal)) +
geom_smooth(alpha = 0.2, size = 0.5, fill = theme_color, na.rm = T) +
geom_point(aes(size = n),
color = theme_color, alpha = 0.85,
shape = 1,
na.rm = T,
show.legend = F) +
labs(
x = NULL,
y = NULL,
subtitle = 'University Waste-Water',
) +
scale_x_study_dates() +
scale_size(range = c(1, 3)) +
remove_x_labels() +
remove_grid()
fig1_ott_ww <-
ggplot(regional_ww_daily) +
geom_smooth(aes(sample_date, mean),
na.rm = T,
method = 'loess', formula = 'y ~ x',
span = 0.2, size = 0.5, alpha = 0.2,
fill = theme_color) +
geom_point(data = regional_ww_biweekly,
aes(biweek, mean),
na.rm = T,
alpha = 0.85, shape = 1, size = 1,
color = theme_color) +
labs(x = 'Date',
y = NULL,
subtitle = 'Regional Waste-Water',
) +
scale_x_study_dates() +
remove_grid() +
theme(axis.title.y = ggtext::element_markdown(lineheight = 1))
figure_1A <- wrap_plots(
fig1_timeline, fig1_cases, fig1_swabs, fig1_co2,
fig1_wifi, fig1_uo_ww, fig1_ott_ww
) &
plot_layout(ncol = 1, heights = c(1))
rm(fig1_timeline, fig1_swabs, fig1_co2, fig1_wifi, fig1_uo_ww, fig1_ott_ww, fig1_cases)
Over the 32-week study period, we collected 566 swabs at six university buildings, including two buildings where wastewater surveillance was currently ongoing. Overall, 13.4% of swabs were PCR-positive for SARS-CoV-2, with individual buildings ranging between 4.7% and 34%. Comment on when spikes in cases, swabs, and waste-water occurred…. Comment on prediction of cases using swabs, ww, or combined approach…
| Site | N | PCR-Positive | % |
|---|---|---|---|
| Overall | 566 | 76 | 13.4 |
| UO_90U | 100 | 34 | 34.0 |
| UO_DMS | 88 | 6 | 6.8 |
| UO_MRT | 92 | 13 | 14.1 |
| UO_MNT | 100 | 9 | 9.0 |
| UO_TBT | 86 | 4 | 4.7 |
| UO_LPR | 100 | 10 | 10.0 |
| Traffic level | N | PCR-Positive | % |
|---|---|---|---|
| high traffic | 286 | 43 | 15.0 |
| low traffic | 280 | 33 | 11.8 |
Figure 1: Time-series of (top to bottom) important events at the university, proportion of PCR-positive swabs, biweekly mean ambient C02 (across collection sites), biweekly mean peak number of Wi-Fi connections (across 5 buildings), biweekly mean waste-water signal (across up to 6 collection sites; relative to PPMoV), biweekly mean regional waste-water detection (relative to PPMoV).
| term | Swab PCR | CO2 | Wi-Fi | University WW | Ottawa WW | Cases |
|---|---|---|---|---|---|---|
| CO2 | -0.19 | NA | NA | NA | NA | NA |
| Wi-Fi | -0.27 | 0.11 | NA | NA | NA | NA |
| University WW | 0.63 | 0.02 | -0.21 | NA | NA | NA |
| Ottawa WW | 0.70 | -0.23 | -0.31 | 0.67 | NA | NA |
| Cases | 0.74 | -0.19 | -0.36 | 0.72 | 0.5 | NA |
Where are these collection sites?
Pink markers represent waste-water collection sites; purple markers represent buildings where surface testing was performed. Only one building, Tabaret Hall, had both surface and waste-water testing. The locations of two waste-water sampling sites could not be ascertained and are not displayed on the map.
It remains unclear which buildings the ‘South Sampling Site’ and ‘North Sampling Site’ catchment areas are related to (not mapped).
I’ve removed the off-campus site (Robert Guidon Hall) and retained the other 6 near-campus sites (raw data below).
This plot shows the counts of positive (red) and negative (yellow) samples collected at each facility over time (x-axis). Samples that could not be tested are shown in navy. Only flocked swabs are shown. (Other sponge swabs were collected on 2022-04-28 were for comparing flocked and sponge swabs.)
This plot shows the counts of positive and negative samples collected at each facility over time.
This section contains results from modeling SARS-CoV-2 cases at UO using swab-PCR results as a predictor.
We created a random intercepts logistic regression model with the occurrence of cases (binary) as outcome and swab results for the previous week (the proportion of positive swabs) as predictor. The model has a random intercept for each site.
Our model formula is
cases ~ positives[lag 1 week] + (1|site).
The model is fit as follows:
swab_model <-
blme::bglmer(
cases_binary ~ detection_lag_1week + (1 | site),
data = uo_sites,
family = binomial
)
These plots show the swab results, cases, and predictions by the
current model.
These tables show the model coefficients and statistics.
| nobs | sigma | logLik | AIC | BIC | deviance | df.residual |
|---|---|---|---|---|---|---|
| 84 | 1 | -24.52 | 55.04 | 62.33 | 42.83 | 81 |
| effect | group | term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|---|---|
| fixed | NA | (Intercept) | -3.301 | 0.804 | -4.107 | 0.0000401 |
| fixed | NA | detection_lag1w | 7.114 | 2.112 | 3.369 | 0.0007549 |
| ran_pars | site | sd__(Intercept) | 1.067 | NA | NA | NA |
| Relation between UO cases (y/n) and proportion of positive swabs the previous week | |||
|---|---|---|---|
| Predictors | Odds Ratios | CI | p |
| (Intercept) | 0.04 | 0.01 – 0.18 | <0.001 |
| Swabs @ t-1week | 1229.31 | 19.59 – 77128.28 | 0.001 |
| Random Effects | |||
| σ2 | 3.29 | ||
| τ00 site | 1.14 | ||
| ICC | 0.26 | ||
| N site | 6 | ||
| Observations | 84 | ||
| Marginal R2 / Conditional R2 | 0.378 / 0.538 | ||
This plot shows the modelling data as points (detection lagged 1 week and cases at a given site- y/n), as well as how the probability of future cases varies by the previous weeks detection level and site, according to our model (curves).
This plot shows the odds ratios for the intercepts of the model (an intercept for each site).
This panel shows the cases counts at UO over the course of our sampling period. The case data shown represents the days on which a positive test was reported (black rug lines) and the presumed start of transmissiblity for each case (red lines).
This panel shows linked data from swab results, CO2
readings, and wifi traffic (number of users @ peak, daily) during our
study period. Unfortunately, we do not have wifi data for 90U.
This panel shows a time-series of the daily peak number of wifi users
at UO facilities. Sampling days are highlighted in blue.